***************************************************************************
***************************************************************************
**Replication files for Enns, Moehlecke and Wlezien: "Detecting True 
**Relationships in Time Series Data with Different Orders of Integration"**
**Table A-1, appendix******************************************************
**Last run: Feb 6th, 2021************************************************** 
***************************************************************************
***************************************************************************

****************************************
**Simulation Code to Replicate Table A-1
****************************************

**********************************
*Table A-1 layout
**********************************
*generate excel file to store Table 1 output
putexcel set tablea1.xlsx, sheet(sheet1) replace
*add labels to table
*row labels
putexcel A4 = "$\hat{\alpha}_1$"
putexcel A5 = "$\hat{\alpha^*}_1$"
putexcel A6 = "$\hat{\beta}_1$"
putexcel A7 = "$\hat{\beta}_2$"
putexcel A8 = "$\hat{\beta^*}_2$"

*column labels
putexcel B1 = "ADL"
putexcel B2 = "$\rho = 0.2$"
putexcel B3 = "coef"
putexcel C3 = "%"
putexcel D2 = "$\rho = 0.5$"
putexcel D3 = "coef"
putexcel E3 = "%"
putexcel F2 = "$\rho = 0.8$"
putexcel F3 = "coef"
putexcel G3 = "%"

putexcel H1 = "GCM"
putexcel H2 = "$\rho = 0.2$"
putexcel H3 = "coef"
putexcel I3 = "%"
putexcel J2 = "$\rho = 0.5$"
putexcel J3 = "coef"
putexcel K3 = "%"
putexcel L2 = "$\rho = 0.8$"
putexcel L3 = "coef"
putexcel M3 = "%"

putexcel N1 = "First Difference"
putexcel N2 = "$\rho = 0.2$"
putexcel N3 = "coef"
putexcel O3 = "%"
putexcel P2 = "$\rho = 0.5$"
putexcel P3 = "coef"
putexcel Q3 = "%"
putexcel R2 = "$\rho = 0.8$"
putexcel R3 = "coef"
putexcel S3 = "%"



************************
**T=5000; x1 = I(0), rho=.2, .5, .8
**x2 = I(1)
**y = x1 + x2 
*************************

*************************************************************************
*************************************************************************
*ADL
*************************************************************************
*************************************************************************
*seed selected based on random.org, b/t 1 and 1000000000 - same seed used for Table 1 so that results are directly comparable
set seed 457090451

**********
**rho=.2
**********
*program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.2*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2 + q
reg y l.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq


sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_3 = _b[L.x1]
sum _sim_1 _b_x1 _sim_3


*export results from ADL rho = 0.2 to excel file
**$\hat{\alpha}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel B4 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_5) if abs(_sim_1/_sim_5)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel C4 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel B6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1) >=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc'
putexcel C6 = `perc'

**$\hat{\beta}_2$
*ceof
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel B7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel C7 = `perc'



**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2 + q
reg y l.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq



sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_3 = _b[L.x1]
sum _sim_1 _b_x1 _sim_3


*export results from ADL rho = 0.5 to excel file
**$\hat{\alpha}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel D4 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_5) if abs(_sim_1/_sim_5)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel E4 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel D6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1) >=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc'
putexcel E6 = `perc'

**$\hat{\beta}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel D7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel E7 = `perc'


**********
**rho=.8
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.8*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2 + q
reg y l.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq



sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_3 = _b[L.x1]
sum _sim_1 _b_x1 _sim_3


*export results from ADL rho = 0.8 to excel file
**$\hat{\alpha}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel F4 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_5) if abs(_sim_1/_sim_5)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel G4 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel F6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1) >=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc'
putexcel G6 = `perc'

**$\hat{\beta}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel F7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel G7 = `perc'



*****************************************************************************
*************************************************************************
*FIRST-DIFFERENCE DV
*************************************************************************
*************************************************************************
*Need to reset the seed to match the ADL analysis to allow direct comparison of simulations
set seed 457090451

**********
**rho=.2
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.2*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2 + q
reg d.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq

sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_2 = _b[L.x1]
sum _b_x1 _sim_2


*export results from First Difference rho = 0.2 to excel file
**$\hat{\beta^}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel N6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1)>=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc' 
putexcel O6 = `perc'

**$\hat{\beta^}_2$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel N7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_2/_sim_5) if abs(_sim_2/_sim_5) >=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel O7 = `perc'

**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2 + q
reg d.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq

sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_2 = _b[L.x1]
sum _b_x1 _sim_2

*export results from First Difference rho = 0.5 to excel file
**$\hat{\beta^}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel P6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1)>=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc' 
putexcel Q6 = `perc'

**$\hat{\beta^}_2$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel P7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_2/_sim_5) if abs(_sim_2/_sim_5) >=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel Q7 = `perc'


**********
**rho=.8
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.8*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2 + q
reg d.y x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq

sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_2 = _b[L.x1]
sum _b_x1 _sim_2

*export results from First Difference rho = 0.8 to excel file
**$\hat{\beta^}_1$
*coef
quietly sum _b_x1
di %6.4f `r(mean)'
putexcel R6 = `r(mean)', nformat("#0.####")
*%
gen tstat_x1 = abs(_b_x1/_se_x1) if abs(_b_x1/_se_x1)>=1.96
quietly sum tstat_x1
local perc = 100*`r(N)'/2000
di `perc' 
putexcel S6 = `perc'

**$\hat{\beta^}_2$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel R7 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_2/_sim_5) if abs(_sim_2/_sim_5) >=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel S7 = `perc'


*************************************************************************
*************************************************************************
*GECM
*************************************************************************
*************************************************************************

*Need to reset the seed to match the ADL analysis to allow direct comparison of simulations
set seed 457090451

**********
**rho=.2
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.2*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2 + q
reg d.y l.y d.x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq



sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_2 = _b[D.x1]
*_sim_3 = _b[L.x1]
sum  _sim_1 _sim_2 _sim_3


*export results from GECM rho = 0.2 to excel file
**$\hat{\alpha^*}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel H5 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_6) if abs(_sim_1/_sim_6)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel I5 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel H6 = `r(mean)', nformat("#0.####")
*%
gen tstat_dx1 = abs(_sim_2/_sim_6) if abs(_sim_2/_sim_6) >=1.96
quietly sum tstat_dx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel I6 = `perc'

**$\hat{\beta^*}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel H8 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel I8 = `perc'


**********
**rho=.5
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.5*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2 + q
reg d.y l.y d.x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq



sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_2 = _b[D.x1]
*_sim_3 = _b[L.x1]
sum  _sim_1 _sim_2 _sim_3

*export results from GECM rho = 0.5 to excel file
**$\hat{\alpha^*}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel J5 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_6) if abs(_sim_1/_sim_6)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel K5 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel J6 = `r(mean)', nformat("#0.####")
*%
gen tstat_dx1 = abs(_sim_2/_sim_6) if abs(_sim_2/_sim_6) >=1.96
quietly sum tstat_dx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel K6 = `perc'

**$\hat{\beta^*}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel J8 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel K8 = `perc'



**********
**rho=.8
**********
program drop combined_noq
program define combined_noq, rclass
drop _all
set obs 5000
gen t = _n
tsset t
*gen stationary time series (x1)
gen e1=invnorm(uniform())
gen x1=e1 if t==1
replace x1=.8*x1[_n-1] + e1 if t>1
*gen integrated time series (x2)
gen u=invnorm(uniform())
gen x2=u if t==1
replace x2=x2[_n-1] + u if t>1
*gen combined times series (z) that his a function of x1 and x2
gen q=invnorm(uniform())
gen y = x1 + x2 + q
reg d.y l.y d.x1 l.x1
	estat bgodfrey
	mat P = r(p)
	return scalar pvalue_bg = P[1,1] 
end

*Simulate the program "combined" N times and save the betas and standard errors.
*Test whether an equation with mixed orders of integration (combined z, I(0) x1, I(1) x2)
*can correctly identify TRUE relationships
simulate pvalue_bg=r(pvalue_bg) _b _se, reps(2000) nodots: combined_noq



sum _eq2_pvalue_bg if _eq2_pvalue_bg<0.05
*_sim_1 = _b[L.y]
*_sim_2 = _b[D.x1]
*_sim_3 = _b[L.x1]
sum  _sim_1 _sim_2 _sim_3


*export results from GECM rho = 0.8 to excel file
**$\hat{\alpha^*}_1$
*coef
quietly sum _sim_1
di %6.4f `r(mean)'
putexcel L5 = `r(mean)', nformat("#0.####")
*%
gen tstat_ly = abs(_sim_1/_sim_6) if abs(_sim_1/_sim_6)>=1.96
quietly sum tstat_ly
local perc = 100*`r(N)'/2000
di `perc' 
putexcel M5 = `perc'

**$\hat{\beta}_1$
*coef
quietly sum _sim_2
di %6.4f `r(mean)'
putexcel L6 = `r(mean)', nformat("#0.####")
*%
gen tstat_dx1 = abs(_sim_2/_sim_6) if abs(_sim_2/_sim_6) >=1.96
quietly sum tstat_dx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel M6 = `perc'

**$\hat{\beta^*}_2$
*coef
quietly sum _sim_3
di %6.4f `r(mean)'
putexcel L8 = `r(mean)', nformat("#0.####")
*%
gen tstat_lx1 = abs(_sim_3/_sim_7) if abs(_sim_3/_sim_7)>=1.96
quietly sum tstat_lx1
local perc = 100*`r(N)'/2000
di `perc'
putexcel M8 = `perc'

